import nltk
import re
import urllib.request as up

response = up.urlopen("https://docs.python.org/3/library/re.html")
html = response.read().decode("utf-8")
tokens = re.split("\W+", html)

print("Total no oof tokens:" + str(len(tokens)))
print(tokens[0:100])
